/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package opennlp.tools.parse_thicket.rhetoric_structure;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import opennlp.tools.parse_thicket.IGeneralizer;
import opennlp.tools.parse_thicket.Pair;
import opennlp.tools.parse_thicket.ParseTreeNode;


public class RhetoricStructureMarker implements IGeneralizer<Integer[]>  {
	//private static String rstRelations[] = {"antithesis", "concession", "contrast", "elaboration"};
	List<Pair<String, ParseTreeNode[]>> rstMarkers = new ArrayList<Pair<String, ParseTreeNode[]>>();

	public  RhetoricStructureMarker(){

		rstMarkers.add(new Pair<String, ParseTreeNode[]>("contrast", new ParseTreeNode[]{new ParseTreeNode(",",","),  new ParseTreeNode("than",",")  }));
		rstMarkers.add(new Pair<String, ParseTreeNode[]>( "antithesis", new ParseTreeNode[]{new ParseTreeNode("although",","),  new ParseTreeNode("*","*")  }));
		rstMarkers.add(new Pair<String, ParseTreeNode[]>( "contrast", new ParseTreeNode[]{new ParseTreeNode(",",","),  new ParseTreeNode("however","*")  }));
		rstMarkers.add(new Pair<String, ParseTreeNode[]>( "contrast", new ParseTreeNode[]{new ParseTreeNode("however","*"), new ParseTreeNode(",",","),
					new ParseTreeNode("*","prp"),   }));
		rstMarkers.add(new Pair<String, ParseTreeNode[]>( "elaboration", new ParseTreeNode[]{new ParseTreeNode(",",","),  new ParseTreeNode("*","NN")  }));
		rstMarkers.add(new Pair<String, ParseTreeNode[]>( "elaboration", new ParseTreeNode[]{new ParseTreeNode("as","*"),  new ParseTreeNode("a","*")  }));
	
		rstMarkers.add(new Pair<String, ParseTreeNode[]>("explanation", new ParseTreeNode[]{new ParseTreeNode(",",","),  new ParseTreeNode("because",",")  }));
		rstMarkers.add(new Pair<String, ParseTreeNode[]>( "example", new ParseTreeNode[]{new ParseTreeNode("for","IN"),  new ParseTreeNode("example","NN")  }));
		rstMarkers.add(new Pair<String, ParseTreeNode[]>( "contrast", new ParseTreeNode[]{new ParseTreeNode(",",","),  new ParseTreeNode("ye","*")  }));
		rstMarkers.add(new Pair<String, ParseTreeNode[]>( "contrast", new ParseTreeNode[]{new ParseTreeNode("yet","*"), new ParseTreeNode(",",","),
					new ParseTreeNode("*","prp"),   }));
		
		rstMarkers.add(new Pair<String, ParseTreeNode[]>( "contrast", new ParseTreeNode[]{new ParseTreeNode("yet","*"), new ParseTreeNode("i","*"),
				  }));
		
		rstMarkers.add(new Pair<String, ParseTreeNode[]>( "explanation", new ParseTreeNode[]{new ParseTreeNode(",",","),  new ParseTreeNode("where","*")  }));
		//as long as
		rstMarkers.add(new Pair<String, ParseTreeNode[]>( "temp_sequence", new ParseTreeNode[]{/*new ParseTreeNode("as","*"),*/ new ParseTreeNode("*","RB"), 
				new ParseTreeNode("as","IN"),}));
		rstMarkers.add(new Pair<String, ParseTreeNode[]>( "temp_sequence", new ParseTreeNode[]{/*new ParseTreeNode("as","*"),*/ new ParseTreeNode("*","VB*"), 
				new ParseTreeNode("until","IN"),}));

	}

	/* For a sentence, we obtain a list of markers with the CA word and position in the sentence
	 * Output span is an integer array with start/end occurrence of an RST marker in a sentence
	 * */
	public List<Pair<String, Integer[]>> extractRSTrelationInSentenceGetBoundarySpan(List<ParseTreeNode> sentence){
		List<Pair<String, Integer[]>> results = new ArrayList<Pair<String, Integer[]>> ();
		
		for(Pair<String, ParseTreeNode[]> template: rstMarkers){
			List<Integer[]> spanList = generalize(sentence,template.getSecond() );
			if (!spanList.isEmpty())
				results.add(new Pair<String, Integer[]>(template.getFirst(), spanList.get(0)));
		}
		return results;
	}

	/* Rule application in the form of generalization
	 * Generalizing a sentence with a rule (a template), we obtain the occurrence of rhetoric marker
	 *
	 * o1 - sentence
	 * o2 - rule/template, specifying lemmas and/or POS, including punctuation
	 * @see opennlp.tools.parse_thicket.IGeneralizer#generalize(java.lang.Object, java.lang.Object)
	 * returns the span Integer[] 
	 */
	@Override
	public List<Integer[]> generalize(Object o1, Object o2) {
		List<Integer[]> result = new ArrayList<Integer[]>();

		List<ParseTreeNode> sentence = (List<ParseTreeNode> )o1;
		ParseTreeNode[] template = (ParseTreeNode[]) o2;

		boolean bBeingMatched = false;
		for(int wordIndexInSentence=0; wordIndexInSentence<sentence.size(); wordIndexInSentence++){
			ParseTreeNode word = sentence.get(wordIndexInSentence);
			int wordIndexInSentenceEnd = wordIndexInSentence; //init iterators for internal loop
			int templateIterator=0;
			while (wordIndexInSentenceEnd<sentence.size() && templateIterator< template.length){
				ParseTreeNode tword = template[templateIterator];
				ParseTreeNode currWord=sentence.get(wordIndexInSentenceEnd);
				List<ParseTreeNode> gRes = tword.generalize(tword, currWord);
				if (gRes.isEmpty()|| gRes.get(0)==null || ( gRes.get(0).getWord().equals("*") 
						&& gRes.get(0).getPos().equals("*") )){
					bBeingMatched = false;
					break;
				} else {
					bBeingMatched = true;
				}
				wordIndexInSentenceEnd++;
				templateIterator++;
			}
			// template iteration is done
			// the only condition for successful match is IF we are at the end of template
			if (templateIterator == template.length){
				result.add(new Integer[]{wordIndexInSentence, wordIndexInSentenceEnd-1});
				return result;
			}

			// no match for current sentence word: proceed to the next
		}
		return result; 
	}
	
	public String markerToString(List<Pair<String, Integer[]>> res){
		StringBuffer buf = new StringBuffer();
		buf.append("[");
		for(Pair<String, Integer[]> marker: res){
			buf.append(marker.getFirst()+":");
			for(int a: marker.getSecond()){
				buf.append(a+" ");
			}
			buf.append (" | ");
		}
		buf.append("]");
		return buf.toString();
	}

	public static void main(String[] args){
		ParseTreeNode[] sent = 	
		new ParseTreeNode[]{new ParseTreeNode("he","prn"), new ParseTreeNode("was","vbz"), new ParseTreeNode("more","jj"), 
				new ParseTreeNode(",",","),  new ParseTreeNode("than",","), new ParseTreeNode("little","jj"), new ParseTreeNode("boy","nn"),
				new ParseTreeNode(",",","), new ParseTreeNode("however","*"), new ParseTreeNode(",",","),
				new ParseTreeNode("he","prp"), new ParseTreeNode("was","vbz"), new ParseTreeNode("adult","jj")
		};
		
		List<Pair<String, Integer[]>> res = new RhetoricStructureMarker().extractRSTrelationInSentenceGetBoundarySpan(Arrays.asList(sent));
		System.out.println( new RhetoricStructureMarker().markerToString(res));
	} 
}
